### Replication Code for Supplementary Material of Jim Crow paper with Robinson Data ###

# Uses R version 2.15.2

library(foreign)
# version 0.8-52
library(xtable)
# version 1.7-0


rd <- read.csv("datashare.csv")
st <- read.csv("stateLabels.csv",header=FALSE)


# Add state names to the data set #
colnames(st) <- c("num","name")
st$name  <- as.character(st$name)
stname <- rep("Maine",dim(rd)[1])
profbw <- rep(0,dim(rd)[1])
for(i in 1:dim(rd)[1]){
	stname[i] <- st$name[st$num == rd$state[i]]
	if(rd$race[i]==1){profbw[i] <- rd$total[i+1]/sum(rd$total[i:(i+2)])}
	if(rd$race[i]==2){profbw[i] <- rd$total[i]/sum(rd$total[(i-1):(i+1)])}
	if(rd$race[i]==3){profbw[i] <- rd$total[i-1]/sum(rd$total[(i-2):i])} 
}
rd$stname <- stname
rd$profbw <- profbw

# Matched State Subset
m <- subset(rd,stname=="Kansas"  | stname=="Wyoming"| stname=="Indiana"| stname=="Nevada")

# State-level variables
s.illit <- tapply(m$illit,m$state,sum)
s.total <- tapply(m$total,m$state,sum)
s.totalblk <- m$total[seq(3,dim(m)[1],3)]
s.proillit <- s.illit/s.total
s.perblk <- tapply(m$perblk,m$state,mean)
s.problk <- tapply(m$problk,m$state,mean)
s.jcrow <- tapply(m$jcrow,m$state,mean)
s.profbw <- tapply(m$profbw,m$state,mean)
s.jcproblk <- s.problk*s.jcrow
s.jcprofbw <- s.profbw*s.jcrow
s.stname <- unique(m$stname)

# true parameter values #
beta0full <- m$illprop[seq(1,10,3)]
beta1full <- m$illprop[seq(2,11,3)]/m$illprop[seq(1,10,3)]
beta2full <- m$illprop[seq(3,12,3)]/m$illprop[seq(1,10,3)]

t.w <- m$total[seq(1,10,3)] 
t.fbw <- m$total[seq(2,11,3)] 
t.b <- m$total[seq(3,12,3)] 
ill.w <- m$illit[seq(1,10,3)] 
ill.fbw <- m$illit[seq(2,11,3)]
ill.b <- m$illit[seq(3,12,3)] 

coefVec <- rep(0,4)
for(p in 1:4){
	n <- sum(c(t.w[p],t.fbw[p],t.b[p]))
	x1 <- rep(0,n)
	x1[(t.w[p]+1):(t.w[p]+t.fbw[p])] <- 1
	x2 <- rep(0,n)
	x2[(t.w[p]+t.fbw[p]+1):n] <- 1
	y <- rep(0,n)
	y[1:ill.w[p]] <- 1
	y[(t.w[p]+1):(t.w[p]+ill.fbw[p])] <- 1
	y[(t.w[p]+t.fbw[p]+1):(t.w[p]+t.fbw[p]+ill.b[p])] <- 1
	coefVec[p] <- glm(y~x2+x1,family=poisson)$coef[2]
	}


## Optimal Sampling for Combined Inference ##

# State Numbers
# Indiana 11
# Kansas 21
# Wyoming 41
# Nevada 46

## Conditional Expected Information Function ##
infoFunc <- function(betaw,betac,n00,n10,n01,n11,k){
	n <- n00+n10+n01
	n.nots = n - k
	xbar = (n11 + n10)/n
	zbar = (n11 + n01)/n
	k.xbar.seq <- 0:k
	k.zbar.seq <- 0:k 
	infoMat <- matrix(0,nr=k+1,nc=k+1)
	k11max<- matrix(0,nr=k+1,nc=k+1)
	for(i in 1:(k+1)){ 
		for(j in 1:((k+1)-(i-1))){
			k1. <- k.xbar.seq[i]
			k.1 <- k.zbar.seq[j]
			k11 <- 0
			k10= k.xbar.seq[i];n10.nots = n10-k10
			k01= k.zbar.seq[j] - k11;n01.nots = n01-k01
			k00 = k - (k10+k01) ;n00.nots = n00-k00
			pi.p <- n00 + n10*exp(betaw)+n01*exp(betac)
			pi.p.nots <- n00.nots + n10.nots*exp(betaw)+n01.nots*exp(betac)
			a <- n10.nots*exp(betaw) -(1/pi.p.nots)*(n10.nots*exp(betaw))^2
			b <- n01.nots*exp(betac) -(1/pi.p.nots)*(n01.nots*exp(betac))^2
			d <-  - (1/pi.p.nots)*(n10.nots*exp(betaw))*(n01.nots*exp(betac))
			e <- n10*exp(betaw) -(1/pi.p)*(n10*exp(betaw))^2
			f <- n01*exp(betac) -(1/pi.p)*(n01*exp(betac))^2
			g <-  - (1/pi.p)*(n10*exp(betaw))*(n01*exp(betac))
			Info <- matrix(c(e,g,g,f),nr=2) - matrix(c(a,d,d,b),nr=2)
			infoMat[i,j] <- Info[1,1] - Info[1,2]*Info[2,1]/Info[2,2]
			}
		}	
	return(infoMat)		
	}
##
	
## Numbers for Table 1 of Supplement ##

# Rows 1 and 2
betaw = 2.5
betac = 2.5
s.seq <- unique(m$state)
state.max <- matrix(0,nr=4,nc=2)
for(s in 1:4){
	infoMat <- infoFunc(betaw=betaw,betac=betac,n00=m$total[m$state==s.seq[s]][1],n10=m$total[m$state==s.seq[s]][3],n01=m$total[m$state==s.seq[s]][2],n11=0,k=100)
	state.max[s,] <- which(infoMat==max(infoMat),arr.ind=TRUE)
	}
sm <-state.max - 1
xtable(t(sm),digits=0)

# Rows 3 and 4
betaw = 2.0
betac = 2.0
s.seq <- unique(m$state)
state.max <- matrix(0,nr=4,nc=2)
for(s in 1:4){
	infoMat <- infoFunc(betaw=betaw,betac=betac,n00=m$total[m$state==s.seq[s]][1],n10=m$total[m$state==s.seq[s]][3],n01=m$total[m$state==s.seq[s]][2],n11=0,k=100)
	state.max[s,] <- which(infoMat==max(infoMat),arr.ind=TRUE)
	}
sm <-state.max - 1
xtable(t(sm),digits=0)

# Rows 5 and 6
betaw = 1.5
betac = 1.5
s.seq <- unique(m$state)
state.max <- matrix(0,nr=4,nc=2)
for(s in 1:4){
	infoMat <- infoFunc(betaw=betaw,betac=betac,n00=m$total[m$state==s.seq[s]][1],n10=m$total[m$state==s.seq[s]][3],n01=m$total[m$state==s.seq[s]][2],n11=0,k=100)
	state.max[s,] <- which(infoMat==max(infoMat),arr.ind=TRUE)
	}
sm <-state.max - 1
xtable(t(sm),digits=0)

# Rows 7 and 8
betaw = 1.0
betac = 1.0
s.seq <- unique(m$state)
state.max <- matrix(0,nr=4,nc=2)
for(s in 1:4){
	infoMat <- infoFunc(betaw=betaw,betac=betac,n00=m$total[m$state==s.seq[s]][1],n10=m$total[m$state==s.seq[s]][3],n01=m$total[m$state==s.seq[s]][2],n11=0,k=100)
	state.max[s,] <- which(infoMat==max(infoMat),arr.ind=TRUE)
	}
sm <-state.max - 1
xtable(t(sm),digits=0)

# Rows 9 and 10
betaw = 0.5
betac = 0.5
s.seq <- unique(m$state)
state.max <- matrix(0,nr=4,nc=2)
for(s in 1:4){
	infoMat <- infoFunc(betaw=betaw,betac=betac,n00=m$total[m$state==s.seq[s]][1],n10=m$total[m$state==s.seq[s]][3],n01=m$total[m$state==s.seq[s]][2],n11=0,k=100)
	state.max[s,] <- which(infoMat==max(infoMat),arr.ind=TRUE)
	}
sm <-state.max - 1
xtable(t(sm),digits=0)

# Rows 11 and 12
betaw = 0.0
betac = 0.0
s.seq <- unique(m$state)
state.max <- matrix(0,nr=4,nc=2)
for(s in 1:4){
	infoMat <- infoFunc(betaw=betaw,betac=betac,n00=m$total[m$state==s.seq[s]][1],n10=m$total[m$state==s.seq[s]][3],n01=m$total[m$state==s.seq[s]][2],n11=0,k=100)
	state.max[s,] <- which(infoMat==max(infoMat),arr.ind=TRUE)
	}
sm <-state.max - 1
xtable(t(sm),digits=0)
##


## Table 2 of the Supplement

s.seq <- unique(m$state)
state.max <- matrix(0,nr=4,nc=2)
for(s in 1:4){
	infoMat <- infoFunc(betaw=log(beta2full[s]),betac=log(beta1full[s]),n00=m$total[m$state==s.seq[s]][1],n10=m$total[m$state==s.seq[s]][3],n01=m$total[m$state==s.seq[s]][2],n11=0,k=100)
	state.max[s,] <- which(infoMat==max(infoMat),arr.ind=TRUE)
	}
sm <-state.max - 1
xtable(t(sm),digits=0)

## Checking the zero for Nevada in Table 2 using variance derived from joint expected information ##

s = 4
beta0 = log(beta0full[s])
betaw = log(beta2full[s])
betac = log(beta1full[s])
s.seq <- unique(m$state)
n = sum(m$total[m$state==s.seq[s]]) 
k = 100
n.nots = n - k
n10 = m$total[m$state==s.seq[s]][3]
n01 = m$total[m$state==s.seq[s]][2]
n00 = m$total[m$state==s.seq[s]][1]

# optimal result according to table 2

k00 <- 73
k10 <- 27
k01 <- 0
n10.nots = n10-k10
n01.nots = n01-k01
n00.nots = n00-k00				
a11 <- k00*exp(beta0) + k10*exp(beta0 + betaw)+k01*exp(beta0 + betac)
a21 <- a12 <- a22 <- k10*exp(beta0 + betaw)
a13 <- a31 <- a33 <- k01*exp(beta0 + betac)
pi.p.nots <- n00.nots*exp(beta0) + n10.nots*exp(beta0 + betaw)+n01.nots*exp(beta0 + betac)
b11 <- pi.p.nots^2
b22 <- (n10.nots*exp(beta0 + betaw))^2
b33 <- (n01.nots*exp(beta0 + betac))^2
b12 <- b21 <- pi.p.nots*(n10.nots*exp(beta0 + betaw))
b13 <- b31 <- pi.p.nots*(n01.nots*exp(beta0 + betac))
b23 <- b32 <- (n10.nots*exp(beta0 + betaw))*(n01.nots*exp(beta0 + betac))
jInfo <- matrix(c(a11,a21,a31,a12,a22,0,a13,0,a33),nr=3) + (1/pi.p.nots)* matrix(c(b11,b21,b31,b12,b22,b32,b13,b23,b33),nr=3) 
solve(jInfo)[2,2] # Variance of 9.32

# using foreign-born instead of native-born white 

k00 <- 0
k10 <- 27
k01 <- 73
n10.nots = n10-k10
n01.nots = n01-k01
n00.nots = n00-k00				
a11 <- k00*exp(beta0) + k10*exp(beta0 + betaw)+k01*exp(beta0 + betac)
a21 <- a12 <- a22 <- k10*exp(beta0 + betaw)
a13 <- a31 <- a33 <- k01*exp(beta0 + betac)
pi.p.nots <- n00.nots*exp(beta0) + n10.nots*exp(beta0 + betaw)+n01.nots*exp(beta0 + betac)
b11 <- pi.p.nots^2
b22 <- (n10.nots*exp(beta0 + betaw))^2
b33 <- (n01.nots*exp(beta0 + betac))^2
b12 <- b21 <- pi.p.nots*(n10.nots*exp(beta0 + betaw))
b13 <- b31 <- pi.p.nots*(n01.nots*exp(beta0 + betac))
b23 <- b32 <- (n10.nots*exp(beta0 + betaw))*(n01.nots*exp(beta0 + betac))
jInfo <- matrix(c(a11,a21,a31,a12,a22,0,a13,0,a33),nr=3) + (1/pi.p.nots)* matrix(c(b11,b21,b31,b12,b22,b32,b13,b23,b33),nr=3) 
solve(jInfo)[2,2] # Variance of 11.61

# zero for Nevada confirmed
##




## combined data log-likelihood function ##

logl <- function(theta,y,x1,x2,yp,t.wf,t.fbwf,t.bf){
b0 <- theta[1]
b1 <- theta[2]
b2 <- theta[3]
lpf <- t.wf*exp(b0) + t.fbwf*exp(b0 + b1) + t.bf*exp(b0 + b2)
retVal <- sum(log(dpois(x=y,lambda=exp(b0 + b1*x1 +b2*x2)))) + log(dpois(yp,lambda=lpf))
return(retVal)
}

### Sampling Distributions (300 observations) ###

## Figure 1 ##

# no restriction #
k <- 300
k.b <- rep(round(.333*k),4)
k.fbw <- rep(round(.333*k),4)
k.w <- rep(round(.333*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2+x1,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat300eq <- coefmat
coefmatInd300eq <- coefmatInd

# with restriction #
k <- 300
k.b <- rep(round(.333*k),4)
k.fbw <- rep(round(.333*k),4)
k.w <- rep(round(.333*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.w[k.w > 0 & illsam.w==0] <- 1
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.fbw[k.fbw > 0 & illsam.fbw==0] <- 1
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
illsam.b[k.b > 0 & illsam.b==0] <- 1

t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2+x1,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat300eq1 <- coefmat
coefmatInd300eq1 <- coefmatInd

#pdf("eq300.pdf")
par(mfrow=c(2,2),mar=c(4,3,3,1))
boxplot(coefmat300eq[,1],coefmat300eq1[,1],coefmatInd300eq[,1],coefmatInd300eq1[,1],main="Indiana",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[1]))

boxplot(coefmat300eq[,2],coefmat300eq1[,2],coefmatInd300eq[,2],coefmatInd300eq1[,2],main="Kansas",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[2]))

boxplot(coefmat300eq[,3],coefmat300eq1[,3],coefmatInd300eq[,3],coefmatInd300eq1[,3],main="Wyoming",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[3]))

boxplot(coefmat300eq[,4],coefmat300eq1[,4],coefmatInd300eq[,4],coefmatInd300eq1[,4],main="Nevada",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[4]))
#dev.off()




## Figure 2 ##

# no restriction #
k <- 300
k.b <- rep(round(.5*k),4)
k.fbw <- rep(5,4)
k.w <- rep(round(.5*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat300wb <- coefmat
coefmatInd300wb <- coefmatInd

# with restriction #
k <- 300
k.b <- rep(round(.5*k),4)
k.fbw <- rep(0,4)
k.w <- rep(round(.5*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.w[k.w > 0 & illsam.w==0] <- 1
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.fbw[k.fbw > 0 & illsam.fbw==0] <- 1
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
illsam.b[k.b > 0 & illsam.b==0] <- 1

t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat300wb1 <- coefmat
coefmatInd300wb1 <- coefmatInd

#pdf("wb300.pdf")
par(mfrow=c(2,2),mar=c(4,3,3,1))
boxplot(coefmat300wb[,1],coefmat300wb1[,1],coefmatInd300wb[,1],coefmatInd300wb1[,1],main="Indiana",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[1]))

boxplot(coefmat300wb[,2],coefmat300wb1[,2],coefmatInd300wb[,2],coefmatInd300wb1[,2],main="Kansas",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[2]))

boxplot(coefmat300wb[,3],coefmat300wb1[,3],coefmatInd300wb[,3],coefmatInd300wb1[,3],main="Wyoming",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[3]))

boxplot(coefmat300wb[,4],coefmat300wb1[,4],coefmatInd300wb[,4],coefmatInd300wb1[,4],main="Nevada",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[4]))
#dev.off()


## Figure 3 ##

# no restriction #
k <- 300
k.b <- c(floor(.69*k), floor(.71*k), floor(.37*k), floor(.32*k))
k.fbw <- k - k.b  
k.w <- rep(round(0*k),4) 
S <- 1000
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat300opt <- coefmat

# with restriction #
k <- 300
k.b <- c(floor(.69*k), floor(.71*k), floor(.37*k), floor(.32*k))
k.fbw <- k - k.b  
k.w <- rep(round(0*k),4) 
S <- 1000
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.w[k.w > 0 & illsam.w==0] <- 1
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.fbw[k.fbw > 0 & illsam.fbw==0] <- 1
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
illsam.b[k.b > 0 & illsam.b==0] <- 1

t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat300opt1 <- coefmat

#pdf("opt300.pdf")
par(mfrow=c(2,2),mar=c(4,3,3,1))
boxplot(coefmat300opt[,1],coefmat300opt1[,1],coefmat300eq[,1],coefmat300eq1[,1],main="Indiana",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[1]))

boxplot(coefmat300opt[,2],coefmat300opt1[,2],coefmat300eq[,2],coefmat300eq1[,2],main="Kansas",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[2]))

boxplot(coefmat300opt[,3],coefmat300opt1[,3],coefmat300eq[,3],coefmat300eq1[,3],main="Wyoming",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[3]))

boxplot(coefmat300opt[,4],coefmat300opt1[,4],coefmat300eq[,4],coefmat300eq1[,4],main="Nevada",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[4]))
#dev.off()



### Sampling Distributions (600 observations) ###

## Figure 4 ##

# no restriction #
k <- 600
k.b <- rep(round(.333*k),4)
k.fbw <- rep(round(.333*k),4)
k.w <- rep(round(.333*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2+x1,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat600eq <- coefmat
coefmatInd600eq <- coefmatInd

# with restriction #
k <- 600
k.b <- rep(round(.333*k),4)
k.fbw <- rep(round(.333*k),4)
k.w <- rep(round(.333*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.w[k.w > 0 & illsam.w==0] <- 1
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.fbw[k.fbw > 0 & illsam.fbw==0] <- 1
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
illsam.b[k.b > 0 & illsam.b==0] <- 1

t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2+x1,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat600eq1 <- coefmat
coefmatInd600eq1 <- coefmatInd

#pdf("eq600.pdf")
par(mfrow=c(2,2),mar=c(4,3,3,1))
boxplot(coefmat600eq[,1],coefmat600eq1[,1],coefmatInd600eq[,1],coefmatInd600eq1[,1],main="Indiana",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[1]))

boxplot(coefmat600eq[,2],coefmat600eq1[,2],coefmatInd600eq[,2],coefmatInd600eq1[,2],main="Kansas",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[2]))

boxplot(coefmat600eq[,3],coefmat600eq1[,3],coefmatInd600eq[,3],coefmatInd600eq1[,3],main="Wyoming",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[3]))

boxplot(coefmat600eq[,4],coefmat600eq1[,4],coefmatInd600eq[,4],coefmatInd600eq1[,4],main="Nevada",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[4]))
#dev.off()


## Figure 5 ##

# no restriction #
k <- 600
k.b <- rep(round(.5*k),4)
k.fbw <- rep(5,4)
k.w <- rep(round(.5*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat600wb <- coefmat
coefmatInd600wb <- coefmatInd

# with restriction #
k <- 600
k.b <- rep(round(.5*k),4)
k.fbw <- rep(0,4)
k.w <- rep(round(.5*k),4)
S <- 1000
coefmatInd <- matrix(NA,nc=4,nr=S)
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.w[k.w > 0 & illsam.w==0] <- 1
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.fbw[k.fbw > 0 & illsam.fbw==0] <- 1
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
illsam.b[k.b > 0 & illsam.b==0] <- 1

t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	coefmatInd[s,p] <- glm(y~x2,family=poisson)$coef[2]
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat600wb1 <- coefmat
coefmatInd600wb1 <- coefmatInd

#pdf("wb600.pdf")
par(mfrow=c(2,2),mar=c(4,3,3,1))
boxplot(coefmat600wb[,1],coefmat600wb1[,1],coefmatInd600wb[,1],coefmatInd600wb1[,1],main="Indiana",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[1]))

boxplot(coefmat600wb[,2],coefmat600wb1[,2],coefmatInd600wb[,2],coefmatInd600wb1[,2],main="Kansas",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[2]))

boxplot(coefmat600wb[,3],coefmat600wb1[,3],coefmatInd600wb[,3],coefmatInd600wb1[,3],main="Wyoming",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[3]))

boxplot(coefmat600wb[,4],coefmat600wb1[,4],coefmatInd600wb[,4],coefmatInd600wb1[,4],main="Nevada",names=c("com","com (>0 ill)","ind","ind (>0 ill)"))
abline(h=log(beta2full[4]))
#dev.off()

## Figure 6 ##

# no restriction #
k <- 600
k.b <- c(floor(.69*k), floor(.71*k), floor(.37*k), floor(.32*k))
k.fbw <- k - k.b  
k.w <- rep(round(0*k),4) 
S <- 1000
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat600opt <- coefmat

# with restriction #
k <- 600
k.b <- c(floor(.69*k), floor(.71*k), floor(.37*k), floor(.32*k))
k.fbw <- k - k.b  
k.w <- rep(round(0*k),4) 
S <- 1000
coefmat <- matrix(NA,nc=4,nr=S)
ptm <- proc.time()
set.seed(123)
for(s in 1:S){
illsam.w <- rhyper(nn=rep(1,4),m=m$illit[seq(1,10,3)],n=m$total[seq(1,10,3)]-m$illit[seq(1,10,3)],k=k.w) 
illsam.w[k.w > 0 & illsam.w==0] <- 1
illsam.fbw <- rhyper(nn=rep(1,4),m=m$illit[seq(2,11,3)],n=m$total[seq(2,11,3)]-m$illit[seq(2,11,3)],k=k.fbw)
illsam.fbw[k.fbw > 0 & illsam.fbw==0] <- 1
illsam.b <- rhyper(nn=rep(1,4),m=m$illit[seq(3,12,3)],n=m$total[seq(3,12,3)]-m$illit[seq(3,12,3)],k=k.b)
illsam.b[k.b > 0 & illsam.b==0] <- 1

t.w <- m$total[seq(1,10,3)] - k.w
t.fbw <- m$total[seq(2,11,3)] - k.fbw
t.b <- m$total[seq(3,12,3)] - k.b 
ill.w <- m$illit[seq(1,10,3)] - illsam.w
ill.fbw <- m$illit[seq(2,11,3)] - illsam.fbw
ill.b <- m$illit[seq(3,12,3)] - illsam.b
initB0 <- log(tapply(m$illit,m$state,sum)/tapply(m$total,m$state,sum))
for(p in 1:4){
	x1 <- rep(0,k)
	if(k.fbw[p] > 0){x1[(k.w[p]+1):(k.w[p]+k.fbw[p])] <- 1}
	x2 <- rep(0,k)
	if(k.b[p] > 0){ x2[(k.w[p]+k.fbw[p]+1):k] <- 1}
	y <- rep(0,k)
	if(illsam.w[p] > 0){y[1:illsam.w[p]] <- 1}
	if(illsam.fbw[p] > 0){y[(k.w[p]+1):(k.w[p]+illsam.fbw[p])] <- 1}
	if(illsam.b[p] > 0){y[(k.w[p]+k.fbw[p]+1):(k.w[p]+k.fbw[p]+illsam.b[p])] <- 1}
	yp <- sum(c(ill.w[p],ill.fbw[p],ill.b[p]))
	badStart = TRUE
	sinit <-0
	while(badStart){
		initPar <- c(log(beta0full[p]), log(beta1full[p]), log(beta2full[p])) + rnorm(n=3,mean=0,sd=sinit)
		if(logl(initPar,y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p]) > -99999){badStart = FALSE}
	sinit <- sinit + .01	
	}
		topt <- optim(par=initPar, logl, method="BFGS", control=list(fnscale=-1,maxit=500),y=y,x1=x1,x2=x2,yp=yp,t.wf=t.w[p],t.fbwf=t.fbw[p],t.bf=t.b[p])
	coefmat[s,p] <- topt$par[3]
	}
}
proc.time() - ptm

coefmat600opt1 <- coefmat

#pdf("opt600.pdf")
par(mfrow=c(2,2),mar=c(4,3,3,1))
boxplot(coefmat600opt[,1],coefmat600opt1[,1],coefmat600eq[,1],coefmat600eq1[,1],main="Indiana",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[1]))

boxplot(coefmat600opt[,2],coefmat600opt1[,2],coefmat600eq[,2],coefmat600eq1[,2],main="Kansas",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[2]))

boxplot(coefmat600opt[,3],coefmat600opt1[,3],coefmat600eq[,3],coefmat600eq1[,3],main="Wyoming",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[3]))

boxplot(coefmat600opt[,4],coefmat600opt1[,4],coefmat600eq[,4],coefmat600eq1[,4],main="Nevada",names=c("opt","opt (>0 ill)","eq","eq (>0 ill)"))
abline(h=log(beta2full[4]))
#dev.off()






## Alternate Matches and Expanded Analysis ##

m <- subset(rd,stname=="Kansas"  | stname=="Wyoming"|stname=="Iowa"| stname==  "Montana")

# State-level variables
s.illit <- tapply(m$illit,m$state,sum)
s.total <- tapply(m$total,m$state,sum)
s.totalblk <- m$total[seq(3,dim(m)[1],3)]
s.proillit <- s.illit/s.total
s.perblk <- tapply(m$perblk,m$state,mean)
s.problk <- tapply(m$problk,m$state,mean)
s.jcrow <- tapply(m$jcrow,m$state,mean)
s.profbw <- tapply(m$profbw,m$state,mean)
s.jcproblk <- s.problk*s.jcrow
s.jcprofbw <- s.profbw*s.jcrow
s.stname <- unique(m$stname)


# Figure 7

#pdf("altmatchlogfig.pdf")
plot(m$jcrow[seq(3,12,3)],log(m$illprop[seq(3,12,3)]/m$illprop[seq(1,10,3)]),type="n",xlim = c(-.2,1.2),ylab="log(Black Illiteracy Rate) - log(White Illiteracy Rate)",xlab="Jim Crow State",main="Individual-Level Data",xaxt="n",sub="(b)")
axis(side=1, at=c(0,1), labels=c("No","Yes"))
text(x=m$jcrow[seq(3,12,3)],y=log(m$illprop[seq(3,12,3)]/m$illprop[seq(1,10,3)]),labels = m$stname[seq(3,12,3)])
segments(x0=0,y0=log(m$illprop[m$stname=="Iowa"& m$race==3]/m$illprop[m$stname=="Iowa"& m$race==1])-.01,x1=1,y1=log(m$illprop[m$stname=="Kansas"& m$race==3]/m$illprop[m$stname=="Kansas"& m$race==1])+.01)
segments(x0=0,y0=log(m$illprop[m$stname=="Montana"& m$race==3]/m$illprop[m$stname=="Montana"& m$race==1])-.01,x1=1,y1=log(m$illprop[m$stname=="Wyoming"& m$race==3]/m$illprop[m$stname=="Wyoming"& m$race==1])+.01)
#dev.off()



## Expanded Matches ##

library(Matching)
library(optmatch)
rd.short <- rd[rd$race==1,]

mhd <- match_on(jcrow ~ problk + profbw, data=rd.short)

pm1 <- pairmatch(mhd, data=rd.short)

pm1.exp <- rep(pm1,rep(3,length(pm1)))

rd.m <- data.frame(rd,pm1.exp)

m <- subset(rd.m,!is.na(pm1.exp))
subset(rd.m,is.na(pm1.exp))


dim(m)

s.y <- log(m$illprop[seq(3,132,3)]/m$illprop[seq(1,130,3)])

s.jcrow <- tapply(m$jcrow,m$state,mean)

plot(s.jcrow,s.y)
abline(lm(s.y~s.jcrow))

# Figure 8

#pdf("expmatchlogfig.pdf")
plot(s.jcrow,s.y,xlim = c(-.2,1.2),ylab="log(Black Illiteracy Rate) - log(White Illiteracy Rate)",xlab="Jim Crow State",main="Individual-Level Data with Expanded Matches",xaxt="n")
axis(side=1, at=c(0,1), labels=c("No","Yes"))
abline(lm(s.y~s.jcrow))
#dev.off()


